/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.searcher;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.logging.Logger;
import net.nutch.util.LogFormatter;
import net.nutch.analysis.NutchAnalysis;
import net.nutch.io.Writable;
/** A Nutch query. */
public final class Query implements Writable {
public static final Logger LOG =
LogFormatter.getLogger("net.nutch.searcher.Query");
/** A query clause. */
public static class Clause {
public static final String DEFAULT_FIELD = "DEFAULT";
private static final byte REQUIRED_BIT = 1;
private static final byte PROHIBITED_BIT = 2;
private static final byte PHRASE_BIT = 4;
private boolean isRequired;
private boolean isProhibited;
private String field = DEFAULT_FIELD;
private float weight = 1.0f;
private Object termOrPhrase;
public Clause(Term term, boolean isRequired, boolean isProhibited) {
this.isRequired = isRequired;
this.isProhibited = isProhibited;
this.termOrPhrase = term;
}
public Clause(Phrase phrase, boolean isRequired, boolean isProhibited) {
this.isRequired = isRequired;
this.isProhibited = isProhibited;
this.termOrPhrase = phrase;
}
public boolean isRequired() { return isRequired; }
public boolean isProhibited() { return isProhibited; }
public String getField() { return field; }
public float getWeight() { return weight; }
public void setWeight(float weight) { this.weight = weight; }
public boolean isPhrase() { return termOrPhrase instanceof Phrase; }
public Phrase getPhrase() { return (Phrase)termOrPhrase; }
public Term getTerm() { return (Term)termOrPhrase; }
public void write(DataOutput out) throws IOException {
byte bits = 0;
if (isPhrase())
bits |= PHRASE_BIT;
if (isRequired)
bits |= REQUIRED_BIT;
if (isProhibited)
bits |= PROHIBITED_BIT;
out.writeByte(bits);
out.writeUTF(field);
out.writeFloat(weight);
if (isPhrase())
getPhrase().write(out);
else
getTerm().write(out);
}
public static Clause read(DataInput in) throws IOException {
byte bits = in.readByte();
boolean required = ((bits & REQUIRED_BIT) != 0);
boolean prohibited = ((bits & PROHIBITED_BIT) != 0);
String field = in.readUTF();
float weight = in.readFloat();
Clause clause;
if ((bits & PHRASE_BIT) == 0) {
clause = new Clause(Term.read(in), required, prohibited);
} else {
clause = new Clause(Phrase.read(in), required, prohibited);
}
clause.field = field;
clause.weight = weight;
return clause;
}
public String toString() {
StringBuffer buffer = new StringBuffer();
// if (isRequired)
// buffer.append("+");
// else
if (isProhibited)
buffer.append ("-");
if (!DEFAULT_FIELD.equals(field)) {
buffer.append(field);
buffer.append(":");
}
buffer.append(termOrPhrase.toString());
return buffer.toString();
}
public boolean equals(Object o) {
if (!(o instanceof Clause)) return false;
Clause other = (Clause)o;
return
(this.isRequired == other.isRequired) &&
(this.isProhibited == other.isProhibited) &&
(this.weight == other.weight) &&
(this.termOrPhrase == null ? other.termOrPhrase == null :
this.termOrPhrase.equals(other.termOrPhrase));
}
public int hashCode() {
return
(this.isRequired ? 0 : 1) ^
(this.isProhibited ? 2 : 4) ^
Float.floatToIntBits(this.weight) ^
(this.termOrPhrase != null ? termOrPhrase.hashCode() : 0);
}
}
/** A single-term query clause. */
public static class Term {
private String text;
public Term(String text) {
this.text = text;
}
public void write(DataOutput out) throws IOException {
out.writeUTF(text);
}
public static Term read(DataInput in) throws IOException {
String text = in.readUTF();
return new Term(text);
}
public String toString() {
return text;
}
public boolean equals(Object o) {
if (!(o instanceof Term)) return false;
Term other = (Term)o;
return text == null ? other.text == null : text.equals(other.text);
}
public int hashCode() {
return text != null ? text.hashCode() : 0;
}
}
/** A phrase query clause. */
public static class Phrase {
private Term[] terms;
public Phrase(Term[] terms) {
this.terms = terms;
}
public Phrase(String[] terms) {
this.terms = new Term[terms.length];
for (int i = 0; i < terms.length; i++) {
this.terms[i] = new Term(terms[i]);
}
}
public Term[] getTerms() { return terms; }
public void write(DataOutput out) throws IOException {
out.writeByte(terms.length);
for (int i = 0; i < terms.length; i++)
terms[i].write(out);
}
public static Phrase read(DataInput in) throws IOException {
int length = in.readByte();
Term[] terms = new Term[length];
for (int i = 0; i < length; i++)
terms[i] = Term.read(in);
return new Phrase(terms);
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append("\"");
for (int i = 0; i < terms.length; i++) {
buffer.append(terms[i].toString());
if (i != terms.length-1)
buffer.append(" ");
}
buffer.append("\"");
return buffer.toString();
}
public boolean equals(Object o) {
if (!(o instanceof Phrase)) return false;
Phrase other = (Phrase)o;
if (!(this.terms.length == this.terms.length))
return false;
for (int i = 0; i < terms.length; i++) {
if (!this.terms[i].equals(other.terms[i]))
return false;
}
return true;
}
public int hashCode() {
int hashCode = terms.length;
for (int i = 0; i < terms.length; i++) {
hashCode ^= terms[i].hashCode();
}
return hashCode;
}
}
private ArrayList clauses = new ArrayList();
private static final Clause[] CLAUSES_PROTO = new Clause[0];
/** Return all clauses. */
public Clause[] getClauses() {
return (Clause[])clauses.toArray(CLAUSES_PROTO);
}
/** Add a required term. */
public void addRequiredTerm(String term) {
clauses.add(new Clause(new Term(term), true, false));
}
/** Add a prohibited term. */
public void addProhibitedTerm(String term) {
clauses.add(new Clause(new Term(term), false, true));
}
/** Add a required phrase. */
public void addRequiredPhrase(String[] terms) {
if (terms.length == 0) { // ignore empty phrase
} else if (terms.length == 1) {
addRequiredTerm(terms[0]); // optimize to term query
} else {
clauses.add(new Clause(new Phrase(terms), true, false));
}
}
/** Add a prohibited phrase. */
public void addProhibitedPhrase(String[] terms) {
if (terms.length == 0) { // ignore empty phrase
} else if (terms.length == 1) {
addProhibitedTerm(terms[0]); // optimize to term query
} else {
clauses.add(new Clause(new Phrase(terms), false, true));
}
}
public void write(DataOutput out) throws IOException {
out.writeByte(clauses.size());
for (int i = 0; i < clauses.size(); i++)
((Clause)clauses.get(i)).write(out);
}
public static Query read(DataInput in) throws IOException {
Query result = new Query();
result.readFields(in);
return result;
}
public void readFields(DataInput in) throws IOException {
clauses.clear();
int length = in.readByte();
for (int i = 0; i < length; i++)
clauses.add(Clause.read(in));
}
public String toString() {
StringBuffer buffer = new StringBuffer();
for (int i = 0; i < clauses.size(); i++) {
buffer.append(clauses.get(i).toString());
if (i != clauses.size()-1)
buffer.append(" ");
}
return buffer.toString();
}
public boolean equals(Object o) {
if (!(o instanceof Query)) return false;
Query other = (Query)o;
return this.clauses.equals(other.clauses);
}
public int hashCode() {
return this.clauses.hashCode();
}
/** Flattens a query into the set of text terms that it contains. These are
* terms which should be higlighted in matching documents. */
public String[] getTerms() {
ArrayList result = new ArrayList();
for (int i = 0; i < clauses.size(); i++) {
Clause clause = (Clause)clauses.get(i);
if (!clause.isProhibited()) {
if (clause.isPhrase()) {
Term[] terms = clause.getPhrase().getTerms();
for (int j = 0; j < terms.length; j++) {
result.add(terms[j].toString());
}
} else {
result.add(clause.getTerm().toString());
}
}
}
return (String[])result.toArray(new String[result.size()]);
}
/** Parse a query from a string. */
public static Query parse(String query) throws IOException {
return NutchAnalysis.parseQuery(query);
}
/** For debugging. */
public static void main(String[] args) throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
while (true) {
System.out.print("Query: ");
String line = in.readLine();
Query query = parse(line);
System.out.println("Parsed: " + query);
System.out.println("Translated: " + QueryTranslator.translate(query));
}
}
}